library(sf)
## Warning: package 'sf' was built under R version 3.6.3
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
library(tidyverse)
## -- Attaching packages ---------------
## v ggplot2 3.2.0     v purrr   0.3.4
## v tibble  3.0.1     v dplyr   0.8.3
## v tidyr   0.8.3     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## Warning: package 'tibble' was built under R version 3.6.3
## Warning: package 'purrr' was built under R version 3.6.3
## -- Conflicts ------------------------
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(mapview)
library(leaflet)
## Warning: package 'leaflet' was built under R version 3.6.2
sj_parks <- 
  st_read("P:/SFBI/Data Library/San_Jose/ParksSelection_2020_0428.gdb")
## Reading layer `ParksSelection_2020_0428' from data source `P:\SFBI\Data Library\San_Jose\ParksSelection_2020_0428.gdb' using driver `OpenFileGDB'
## Simple feature collection with 308 features and 47 fields
## geometry type:  GEOMETRY
## dimension:      XY
## bbox:           xmin: 6117585 ymin: 1881461 xmax: 6222035 ymax: 1982366
## CRS:            2227
sj_parks_clean <-
  sj_parks %>% 
  filter(OWNER == "PRNS") %>% 
  filter(!is.na(LOCATIONID)) %>% 
  filter(PARKTYPE %in% c("COMGRD","DOG","NEIG","OS","REG","SPRT")) %>% 
  st_cast("MULTIPOLYGON") %>%
  mutate(NAME = as.character(NAME)) %>% 
  arrange(LOCATIONID,nchar(NAME)) %>% 
  group_by(LOCATIONID) %>% 
  summarize(
    NAME = first(NAME),
    AREA = sum(SHAPE_Area, na.rm=T)
  )
safegraph_parks <- 
  readRDS("all_parks_daily.rds") %>% 
  filter(!duplicated(location_name))
parks_join <-
  sj_parks_clean %>% 
  left_join(safegraph_parks, by = c("NAME" = "location_name"))
## Warning: Column `NAME`/`location_name` joining character vector and factor,
## coercing into character vector
failed_join <-
  parks_join %>% 
  filter(is.na(date))

This is a map of all safegraph parks and all SJ GIS parks

leaflet() %>% 
  addTiles() %>% 
  addPolygons(
    data = sj_parks_clean %>% 
      st_transform(4326),
    fill = F,
    label = ~NAME
  ) %>% 
  addCircles(
    data = safegraph_parks,
    lat = ~latitude,
    lng = ~longitude,
    color = "red",
    label = ~location_name
  )
parks_spatialjoin <-
  sj_parks_clean %>% 
  st_transform(4326) %>% 
  st_join(
    safegraph_parks %>% 
      st_as_sf(
        coords = c("longitude","latitude"), 
        crs = 4326
      )
  )
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
safegraph_success_firstpass <-
  parks_spatialjoin %>% 
  filter(!is.na(date))

safegraph_fail_join <-
  safegraph_parks %>% 
  filter(!location_name %in% safegraph_success_firstpass$location_name)

parks_fail_join <-
  parks_spatialjoin %>% 
  filter(is.na(date))

This is a map showing in blue the SJ GIS parks that didn’t get a direct spatial join match, and all

leaflet() %>% 
  addTiles() %>% 
  addPolygons(
    data = parks_fail_join %>% 
      st_transform(4326),
    fill = F,
    label = ~NAME
  ) %>% 
  addCircles(
    data = safegraph_fail_join,
    lat = ~latitude,
    lng = ~longitude,
    color = "red",
    label = ~location_name
  )

Try name join on these failed joins.

join_secondpass <-
  parks_fail_join %>% 
  dplyr::select(LOCATIONID,NAME,AREA,SHAPE) %>% 
  left_join(safegraph_fail_join %>% mutate(NAME = location_name %>% as.character), by = "NAME") %>% 
  filter(!is.na(date))

These were the matches. See how the red circle was not within the blue.

leaflet() %>% 
  addTiles() %>% 
  addPolygons(
    data = join_secondpass %>% 
      st_transform(4326),
    fill = F,
    label = ~NAME
  ) %>% 
  addCircles(
    data = join_secondpass,
    lat = ~latitude,
    lng = ~longitude,
    color = "red",
    label = ~location_name
  )
safegraph_success_secondpass <-
  safegraph_success_firstpass %>% 
  rbind(join_secondpass %>% dplyr::select(-latitude,-longitude))

safegraph_parks_updated <- 
  readRDS("all_parks_daily.rds") %>% 
  filter(location_name %in% safegraph_success_secondpass$location_name)

Just got Safegraph geometry, which could help with the joining as well.

safegraph_geometry <- 
  read_csv("P:/SFBI/Restricted Data Library/Safegraph/SanJoseSunnyvaleSantaClara-CA-MSA-CORE_POI-GEOMETRY-2020_03-2020-04-20/core_poi-geometry.csv.gz")
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   naics_code = col_double(),
##   latitude = col_double(),
##   longitude = col_double(),
##   postal_code = col_double(),
##   phone_number = col_double(),
##   is_synthetic = col_logical(),
##   includes_parking_lot = col_logical()
## )
## See spec(...) for full column specifications.
safegraph_geometry_sf <-
  safegraph_geometry %>% 
  st_as_sf(wkt = "polygon_wkt", crs = 4326)

safegraph_parks_sf <-
  safegraph_geometry_sf %>% 
  filter(location_name %in% safegraph_parks$location_name)

mapview(safegraph_parks_sf)